library(ggplot2)
library(dplyr)
library(fst)
library(reshape2)
library(miceadds)

load("main_data_local.rdata")

# make vector of continuing municipalities: 

kom_cont <- 
  c("101", "147", "151", "153", "155", "157", "159", "161", 
    "163", "165", "167", "169", "173", "175", "183", "185", 
    "187", "201", "217", "223", "400", "253", "269", "329", 
    "461", "563", "607", "727", "741", "751", "773", "825")


var <- c("run_kv", "VALGT_JN")
lab <- c("Running for \nmunicipality", 
         "Elected for \nmunicipality")

data_out   <- data_frame()
data_model <- data_frame()

for(k in seq(1993, 2013, by = 4)){
  # remove bottom 0.1% and top 0.1% to avoid huge outliers. 
  data_an <- 
    data_comp_local %>% 
    filter(four_years == k) %>% 
    filter(in_year == 1) %>% 
    filter(inc_res < quantile(inc_res, p = 0.999) &
             inc_res > quantile(inc_res, p = 0.001) ) 
  
  bef <- 
    read.fst(paste("bef", k, ".fst", sep = ""))  
  
  data_an <- 
    left_join(data_an, bef, by = "PNR")
  
  mun_comp <- 
    data_an %>% 
    group_by(KOM) %>% 
    summarise(kom_comp = mean(inc_res, na.rm = TRUE))
  
  data_an <- 
    left_join(data_an, mun_comp, by = "KOM")
  
  for(j in 1:2){
    data_merg <- 
      data_an %>% 
      filter(.[ , var[j]] == 1) %>%
      filter(!KOM %in% kom_cont) 
    
    data_cont <- 
      data_an %>% 
      filter(.[ , var[j]] == 1) %>%
      filter(KOM %in% kom_cont)
    
    data_out_temp <-
      data_frame(year = k,
                 lab  = lab[j],
                 mean_merg  = mean(data_merg$inc_res),
                 mean_cont  = mean(data_cont$inc_res),
                 mean_merg_kom  = mean(data_merg$inc_res - data_merg$kom_comp),
                 mean_cont_kom  = mean(data_cont$inc_res - data_cont$kom_comp),
                 se_merg  = sd(data_merg$inc_res)/sqrt(sum(!is.na(data_merg$inc_res))),
                 se_cont  = sd(data_cont$inc_res)/sqrt(sum(!is.na(data_cont$inc_res))),
                 se_merg_kom  = sd(data_merg$inc_res - data_merg$kom_comp)/sqrt(sum(!is.na(data_merg$inc_res))),
                 se_cont_kom  = sd(data_cont$inc_res - data_cont$kom_comp)/sqrt(sum(!is.na(data_cont$inc_res))))
    
    data_out <- 
      bind_rows(data_out, data_out_temp)
    
    data_year_out <- 
      bind_rows(data_merg %>% 
                  select(c("PNR", "inc_res", "kom_comp", "KOM")) %>% 
                  mutate(year = k,
                         type = var[j], 
                         merg = 1),
                data_cont %>% 
                  select(c("PNR", "inc_res", "kom_comp", "KOM")) %>% 
                  mutate(year = k,
                         type = var[j], 
                         merg = 0))
    
    data_model <- 
      bind_rows(data_model, data_year_out)
  }
  print(k)
  print(Sys.time())
}


# find FEs for old municipalities
bef2006 <- 
  read.fst(paste("bef", 2006, ".fst", sep = ""))  
bef2007 <- 
  read.fst(paste("bef", 2007, ".fst", sep = ""))  

bef2007 <- 
  filter(bef2007, KOM != 411) %>% 
  mutate(nykom = KOM) %>% 
  select(c("PNR", "nykom"))

bef2006 <-
  left_join(bef2006, bef2007, by = "PNR") %>% 
  filter(!is.na(nykom))

nykom <- 
  bef2006 %>% 
  group_by(KOM) %>% 
  summarise(nykom = Mode(nykom, na.rm =TRUE))

data_model <- 
  left_join(data_model, nykom, by ="KOM")



data_plot <- 
  melt(as.data.frame(data_out), 
       id = c("year", "lab"))


data_points <- 
  data_plot %>% 
  filter(variable == "mean_merg" | 
           variable == "mean_cont") %>% 
  mutate(estimate = value,
         variable = case_when(variable == "mean_merg" ~ "merg",
                              variable == "mean_cont" ~ "cont"))  %>%
  select(c("year", "lab", "variable", "estimate"))
  

data_ses <- 
  data_plot %>% 
  filter(variable == "se_merg" | 
           variable == "se_cont") %>% 
  mutate(se = value,
         variable = case_when(variable == "se_merg" ~ "merg",
                              variable == "se_cont" ~ "cont"))  %>%
  select(c("year", "lab", "variable", "se"))

data_plot1 <- 
  left_join(data_points, data_ses) %>% 
  mutate(year = ifelse(variable == "merg", year - 0.2, year + 0.2))

data_points_kom <- 
  data_plot %>% 
  filter(variable == "mean_merg_kom" | 
           variable == "mean_cont_kom") %>% 
  mutate(estimate = value, 
         variable = case_when(variable == "mean_merg_kom" ~ "merg",
                              variable == "mean_cont_kom" ~ "cont"))  %>%
  select(c("year", "lab", "variable", "estimate")) 

data_ses_kom <- 
  data_plot %>% 
  filter(variable == "se_merg_kom" | 
           variable == "se_cont_kom") %>% 
  mutate(se = value,
         variable = case_when(variable == "se_merg_kom" ~ "merg",
                              variable == "se_cont_kom" ~ "cont"))  %>%
  select(c("year", "lab", "variable", "se")) 

data_plot2 <- 
  left_join(data_points_kom, data_ses_kom) %>% 
  mutate(year = ifelse(variable == "mean_merg_kom", year - 0.2, year + 0.2))


data_plot1 <- 
  data_plot1 %>% 
  mutate(lab = as.factor(lab),
         lab = factor(lab, levels = levels(lab)[c(2,1)]))

plot1 <- 
  ggplot(data = data_plot1,
         aes(x = year,
             y = estimate,
             ymin = estimate + qnorm(0.025) * se,
             ymax = estimate + qnorm(0.975) * se,
             alpha = variable)) + 
  facet_grid(lab ~ ., scales = "free_y") +
  geom_errorbar(width = 0) + 
  geom_point() + 
  theme_classic() +
  scale_y_continuous("Income Score") + 
  scale_x_continuous("Year", breaks = seq(1993, 2013, 4)) +
  geom_vline(xintercept = 2003, linetype = "dashed", alpha = 0.5) + 
  scale_alpha_discrete("", range = c(0.25, 1), 
                       labels = c("Continuing \nmunicipalities",
                                  "Amalgamated \nmunicipalities")) +
  theme(panel.spacing = unit(2, "lines"))


data_plot2 <- 
  data_plot2 %>% 
  mutate(lab = as.factor(lab),
         lab = factor(lab, levels = levels(lab)[c(2,1)]))

plot2 <- 
  ggplot(data = data_plot2,
         aes(x = year,
             y = estimate,
             ymin = estimate + qnorm(0.025) * se,
             ymax = estimate + qnorm(0.975) * se,
             alpha = variable)) + 
  facet_grid(lab ~ ., scales = "free_y") +
  geom_errorbar(width = 0) + 
  geom_point() + 
  theme_classic() +
  scale_y_continuous("Income Score") + 
  scale_x_continuous("Year", breaks = seq(1993, 2013, 4)) +
  geom_vline(xintercept = 2003, linetype = "dashed", alpha = 0.5) + 
  scale_alpha_discrete("", range = c(0.25, 1), 
                       labels = c("Continuing \nmunicipalities",
                                  "Amalgamated \nmunicipalities")) +
  theme(panel.spacing = unit(2, "lines"))

ggsave(plot1, width = 9, height = 6)
